// Copyright 2017 ETH Zurich and University of Bologna.
// Copyright and related rights are licensed under the Solderpad Hardware
// License, Version 0.51 (the “License”); you may not use this file except in
// compliance with the License.  You may obtain a copy of the License at
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
// or agreed to in writing, software, hardware and materials distributed under
// this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
/////////////////////////////////////////////////////////
// includes
/////////////////////////////////////////////////////////
#include "mlGrad.h"
#include "mlShared.h"
#include "math_fns.h"
#include "bar.h"
#include "utils.h"
#include "bench.h"

/////////////////////////////////////////////////////////
// shared globals
/////////////////////////////////////////////////////////

    __attribute__ ((section(".heapsram"))) static const float fv0[900] = { 0.570375383F, 0.657617033F, 0.64612633F,
    0.533763766F, 0.533158362F, 0.53759F, 0.454020441F, 0.32859987F, 0.36540848F,
    0.518997073F, 0.463570297F, 0.348572135F, 0.413014382F, 0.42422542F,
    0.397092432F, 0.62996763F, 0.637491524F, 0.619215965F, 0.532409072F,
    0.43628633F, 0.420253128F, 0.430276483F, 0.416637212F, 0.4482131F,
    0.480580151F, 0.405900151F, 0.339731455F, 0.433532417F, 0.506313682F,
    0.505941153F, 0.707628489F, 0.690695703F, 0.55804193F, 0.477530599F,
    0.370715022F, 0.347958386F, 0.461852342F, 0.477893412F, 0.421032369F,
    0.414258301F, 0.408221036F, 0.442145348F, 0.573305905F, 0.610452771F,
    0.537319303F, 0.619721055F, 0.649762809F, 0.527081072F, 0.41496861F,
    0.329554915F, 0.327371091F, 0.499550343F, 0.532582F, 0.466990024F,
    0.475864857F, 0.416902572F, 0.470264167F, 0.613834739F, 0.579091668F,
    0.493038416F, 0.500020325F, 0.487290472F, 0.498995185F, 0.41140753F,
    0.284076035F, 0.343117535F, 0.563766181F, 0.663998961F, 0.614299774F,
    0.514180839F, 0.368569762F, 0.415618151F, 0.546366811F, 0.562632382F,
    0.541050732F, 0.503178656F, 0.447943091F, 0.468476325F, 0.397143662F,
    0.350663036F, 0.475847602F, 0.577195644F, 0.662973464F, 0.698279142F,
    0.561233342F, 0.44094047F, 0.510912657F, 0.588345F, 0.585276544F,
    0.563939631F, 0.500784516F, 0.466858F, 0.453471869F, 0.345360428F,
    0.398187608F, 0.595026433F, 0.613098323F, 0.604943573F, 0.689836621F,
    0.643894F, 0.532368124F, 0.567907691F, 0.683792F, 0.685706854F, 0.640066087F,
    0.335992128F, 0.311046094F, 0.363005847F, 0.390011042F, 0.430408865F,
    0.525430262F, 0.60638231F, 0.643903255F, 0.669935524F, 0.623054266F,
    0.497315377F, 0.501369178F, 0.678653538F, 0.74784F, 0.688199401F,
    0.295918673F, 0.334359705F, 0.408022523F, 0.501292408F, 0.510217369F,
    0.408261299F, 0.429982305F, 0.586608827F, 0.617763102F, 0.557568967F,
    0.492190719F, 0.477862716F, 0.557847142F, 0.57553F, 0.479358613F,
    0.435387194F, 0.490493923F, 0.544110835F, 0.514647841F, 0.505888522F,
    0.402110964F, 0.320509732F, 0.483248383F, 0.559449911F, 0.502640486F,
    0.513368607F, 0.504762471F, 0.431108445F, 0.383894175F, 0.306279689F,
    0.418220937F, 0.383660048F, 0.513636887F, 0.525292695F, 0.485129088F,
    0.413181037F, 0.342828363F, 0.489702761F, 0.590186775F, 0.521347463F,
    0.521739066F, 0.533707321F, 0.45359239F, 0.447879851F, 0.393175066F,
    0.428315967F, 0.340824932F, 0.506960392F, 0.538066328F, 0.369057953F,
    0.259102911F, 0.299870849F, 0.48802802F, 0.602885485F, 0.535740137F,
    0.45771566F, 0.448358446F, 0.483978868F, 0.547188401F, 0.480158776F,
    0.493667275F, 0.447012484F, 0.515056074F, 0.468720227F, 0.315522581F,
    0.254702866F, 0.278253198F, 0.359735936F, 0.476559699F, 0.486493796F,
    0.430286497F, 0.438674927F, 0.479908377F, 0.517742872F, 0.526367247F,
    0.341234088F, 0.36655432F, 0.393128544F, 0.422686875F, 0.413922459F,
    0.401496321F, 0.381707609F, 0.356192857F, 0.44029811F, 0.510984182F,
    0.492791325F, 0.491649628F, 0.461277425F, 0.479610234F, 0.596540451F,
    0.267520338F, 0.295884877F, 0.381329119F, 0.477571636F, 0.498292625F,
    0.489341527F, 0.501964569F, 0.468927234F, 0.45598641F, 0.489024F,
    0.486926883F, 0.443551332F, 0.429459453F, 0.491381019F, 0.561445355F,
    0.477355927F, 0.583861F, 0.579803646F, 0.52354908F, 0.572982848F,
    0.639422655F, 0.66424793F, 0.658028364F, 0.540062964F, 0.4061701F, 0.479748F,
    0.508002579F, 0.410487264F, 0.554353F, 0.647629142F, 0.395488024F,
    0.427297145F, 0.558652639F, 0.595369F, 0.613797605F, 0.691569209F,
    0.713298202F, 0.634803355F, 0.489962071F, 0.406654239F, 0.501191854F,
    0.553565443F, 0.484794617F, 0.554139912F, 0.591870129F, 0.37131232F,
    0.367338181F, 0.458741367F, 0.451116443F, 0.435792416F, 0.602391541F,
    0.716561377F, 0.701999545F, 0.656109F, 0.552596152F, 0.455299765F,
    0.480281204F, 0.594044924F, 0.578972459F, 0.480415344F, 0.348857522F,
    0.439169347F, 0.41374439F, 0.34477219F, 0.367760241F, 0.528208435F,
    0.666271269F, 0.696606576F, 0.715683639F, 0.677002F, 0.526939094F,
    0.458766103F, 0.536742568F, 0.470953345F, 0.381887853F, 0.391261876F,
    0.527135968F, 0.533863783F, 0.493574947F, 0.520370901F, 0.571554244F,
    0.586699486F, 0.533677399F, 0.495663613F, 0.584686875F, 0.582646728F,
    0.454031438F, 0.407812536F, 0.369897038F, 0.369427413F, 0.470775217F,
    0.517221868F, 0.532019198F, 0.520380437F, 0.513537169F, 0.457085878F,
    0.390528798F, 0.404939592F, 0.419836313F, 0.556877077F, 0.675233F,
    0.57592535F, 0.452476501F, 0.393321186F, 0.410377324F, 0.525109351F,
    0.476451606F, 0.404799968F, 0.350557953F, 0.320324272F, 0.262258112F,
    0.232897431F, 0.368432194F, 0.50718677F, 0.638994753F, 0.738070369F,
    0.62206161F, 0.444740713F, 0.3664428F, 0.356527507F, 0.576730967F,
    0.48393473F, 0.399824321F, 0.342513621F, 0.372050613F, 0.391384065F,
    0.332142442F, 0.378779083F, 0.494447321F, 0.549469352F, 0.561406314F,
    0.48054263F, 0.401454896F, 0.368874967F, 0.282835931F, 0.503160775F,
    0.397553682F, 0.472967714F, 0.494010717F, 0.540227771F, 0.626645207F,
    0.560775697F, 0.467986971F, 0.456514686F, 0.378706068F, 0.369116098F,
    0.477598101F, 0.552729905F, 0.514218211F, 0.3318775F, 0.426543742F,
    0.304411799F, 0.458455741F, 0.565337181F, 0.578742385F, 0.647063F,
    0.64630717F, 0.560721F, 0.50613308F, 0.401807785F, 0.335897356F,
    0.451519459F, 0.624419808F, 0.638153374F, 0.472327322F, 0.456350356F,
    0.404413313F, 0.464106977F, 0.571858883F, 0.580022931F, 0.532992601F,
    0.504552F, 0.513900042F, 0.564753234F, 0.520632565F, 0.338111103F,
    0.280943394F, 0.415544301F, 0.512123108F, 0.513786674F, 0.338333547F,
    0.475184739F, 0.536497831F, 0.500198245F, 0.460171521F, 0.42879948F,
    0.455266416F, 0.53435564F, 0.599676371F, 0.578680277F, 0.425839752F,
    0.283581823F, 0.224410087F, 0.238721535F, 0.36297524F, 0.28693065F,
    0.423656672F, 0.540284455F, 0.489114136F, 0.422452807F, 0.492161453F,
    0.646798F, 0.692417681F, 0.618423223F, 0.596999F, 0.547420859F, 0.404666454F,
    0.315268964F, 0.289610237F, 0.372223407F, 0.468167543F, 0.428697348F,
    0.469406962F, 0.494722486F, 0.416521132F, 0.490517765F, 0.707147F,
    0.681241751F, 0.502032638F, 0.53170836F, 0.588431895F, 0.523635447F,
    0.529506F, 0.498572946F, 0.428906679F, 0.503629506F, 0.406570554F,
    0.346382529F, 0.3294245F, 0.2793504F, 0.356780738F, 0.52579844F, 0.46348393F,
    0.345264941F, 0.478629917F, 0.609759152F, 0.631097078F, 0.639506221F,
    0.523963809F, 0.330605239F, 0.403682709F, 0.595998645F, 0.573376417F,
    0.559280455F, 0.526649773F, 0.414441407F, 0.37922737F, 0.47535345F,
    0.497775614F, 0.401533484F, 0.410084426F, 0.490879804F, 0.526628435F,
    0.497878373F, 0.468214214F, 0.460431039F, 0.615953803F, 0.54305464F,
    0.499617308F, 0.447377473F, 0.455182672F, 0.520425439F, 0.561045587F,
    0.560657918F, 0.414601654F, 0.366306871F, 0.472394049F, 0.440638065F,
    0.357558F, 0.338804334F, 0.50071305F, 0.5259161F, 0.468774229F, 0.487225264F,
    0.437529027F, 0.44985646F, 0.546538651F, 0.525878608F, 0.544616222F,
    0.554994524F, 0.501118839F, 0.503431857F, 0.449981421F, 0.440766603F,
    0.469517171F, 0.510180295F, 0.441066653F, 0.436782658F, 0.526091516F,
    0.493957192F, 0.426045746F, 0.416923553F, 0.415977627F, 0.511040032F,
    0.544216F, 0.501982808F, 0.475399911F, 0.514988363F, 0.634521186F,
    0.610552073F, 0.498238683F, 0.425174683F, 0.468259364F, 0.624955535F,
    0.671321034F, 0.522902906F, 0.344153166F, 0.384253681F, 0.503045321F,
    0.435612112F, 0.373607904F, 0.338921547F, 0.452269524F, 0.623775244F,
    0.586880445F, 0.494274288F, 0.463338017F, 0.533796966F, 0.643938124F,
    0.739420593F, 0.575829804F, 0.370470703F, 0.518050909F, 0.612966478F,
    0.451624274F, 0.317059338F, 0.238534465F, 0.31512922F, 0.445570976F,
    0.479917496F, 0.538610399F, 0.607956231F, 0.597737193F, 0.498414904F,
    0.554245472F, 0.497028291F, 0.38781786F, 0.559042156F, 0.644185722F,
    0.553146303F, 0.480924875F, 0.380155F, 0.31646058F, 0.354904175F,
    0.43222174F, 0.649948955F, 0.724463701F, 0.646152794F, 0.427104026F,
    0.390616447F, 0.422163755F, 0.388947874F, 0.500350177F, 0.647138119F,
    0.661926806F, 0.580148578F, 0.444552183F, 0.377513F, 0.43606326F,
    0.469638348F, 0.703156054F, 0.675012589F, 0.566831708F, 0.447067022F,
    0.378223628F, 0.377574563F, 0.438048512F, 0.54449296F, 0.660567164F,
    0.683334053F, 0.5046134F, 0.334550023F, 0.374609143F, 0.408670455F,
    0.327881128F, 0.608988404F, 0.558302343F, 0.440035701F, 0.455524504F,
    0.439359426F, 0.437627435F, 0.510931611F, 0.494502425F, 0.49759993F,
    0.567383051F, 0.479768485F, 0.389741272F, 0.415097982F, 0.319958F,
    0.235463232F, 0.548334F, 0.485077053F, 0.423407674F, 0.532700956F,
    0.584559739F, 0.613823473F, 0.565359771F, 0.388139427F, 0.381617F,
    0.437052131F, 0.416472763F, 0.496167779F, 0.510148585F, 0.374988794F,
    0.346318275F, 0.48703897F, 0.406545103F, 0.380460799F, 0.50517F,
    0.606581867F, 0.682730377F, 0.607064307F, 0.362797648F, 0.363657594F,
    0.478157312F, 0.460856199F, 0.489751905F, 0.527504146F, 0.477483034F,
    0.415285379F, 0.36584729F, 0.302601963F, 0.298398107F, 0.387974232F,
    0.501583397F, 0.658454F, 0.642921269F, 0.400501132F, 0.376620978F,
    0.597954929F, 0.616496503F, 0.408615887F, 0.384886563F, 0.49649328F,
    0.449873328F, 0.42870602F, 0.382951945F, 0.376487136F, 0.387584388F,
    0.430636466F, 0.587614179F, 0.601863563F, 0.4626019F, 0.418268F,
    0.537163317F, 0.603798628F, 0.386509746F, 0.255398095F, 0.430579543F,
    0.491554946F, 0.580085635F, 0.502956629F, 0.48089537F, 0.402253F,
    0.324118435F, 0.426968127F, 0.553970873F, 0.576356292F, 0.52648592F,
    0.513352215F, 0.564723849F, 0.443413317F, 0.239688784F, 0.284181148F,
    0.360091418F, 0.516596198F, 0.519539416F, 0.403644979F, 0.414552659F,
    0.495380282F, 0.488233179F, 0.366845518F, 0.337861657F, 0.41890493F,
    0.496183336F, 0.567189813F, 0.575187564F, 0.607684076F, 0.664012671F,
    0.638898551F, 0.389594734F, 0.493699551F, 0.603779197F, 0.560664833F,
    0.489881784F, 0.453862607F, 0.350981086F, 0.313142866F, 0.363469303F,
    0.434249103F, 0.497448117F, 0.512972772F, 0.480327606F, 0.372489929F,
    0.356030077F, 0.381279F, 0.428376079F, 0.648483F, 0.586772382F, 0.431277514F,
    0.458029091F, 0.469196349F, 0.405028909F, 0.406660706F, 0.454644263F,
    0.40568614F, 0.363195658F, 0.364147753F, 0.308624148F, 0.347473979F,
    0.429440558F, 0.411668509F, 0.525552571F, 0.458038956F, 0.364928812F,
    0.427163869F, 0.492299825F, 0.401536852F, 0.38083F, 0.464979261F,
    0.378877908F, 0.301141351F, 0.342897743F, 0.378307074F, 0.472638F,
    0.441217542F, 0.520019829F, 0.553052247F, 0.50607878F, 0.479854614F,
    0.421470374F, 0.34823373F, 0.335539F, 0.423578203F, 0.507829547F,
    0.47256282F, 0.469134748F, 0.507110655F, 0.505617499F, 0.52728796F,
    0.381732762F, 0.559967875F, 0.672490478F, 0.646646202F, 0.658956468F,
    0.599873304F, 0.419620842F, 0.412774205F, 0.515286326F, 0.480891138F,
    0.471651912F, 0.581770539F, 0.632894635F, 0.599760115F, 0.529085279F,
    0.448638827F, 0.582430959F, 0.705944955F, 0.655893803F, 0.646911919F,
    0.669633269F, 0.563046932F, 0.522176743F, 0.541544557F, 0.440499038F,
    0.404549628F, 0.470998883F, 0.530875564F, 0.538959146F, 0.438919604F,
    0.548433125F, 0.572496653F, 0.648529F, 0.580351591F, 0.513547301F,
    0.563711703F, 0.528548241F, 0.501780868F, 0.566721141F, 0.518252969F,
    0.410442024F, 0.37692818F, 0.449602664F, 0.483719945F, 0.42222324F,
    0.532318592F, 0.50804019F, 0.537635684F, 0.390574217F, 0.297409803F,
    0.428356498F, 0.509273589F, 0.560166538F, 0.642535508F, 0.627441227F,
    0.532165051F, 0.458558798F, 0.456478059F, 0.446335226F, 0.408380717F,
    0.451647341F, 0.429355145F, 0.362802386F, 0.242448732F, 0.280439287F,
    0.430675834F, 0.524268866F, 0.670189202F, 0.731848776F, 0.660232723F,
    0.604807436F, 0.539584219F, 0.480038971F, 0.447091728F, 0.364396602F,
    0.429402709F, 0.404007822F, 0.364486754F, 0.388387501F, 0.48798117F,
    0.551784F, 0.553619385F, 0.674165964F, 0.731815636F, 0.553762257F, 0.429572F,
    0.456074417F, 0.469638646F, 0.436291397F, 0.392181307F, 0.570146203F,
    0.498411596F, 0.617397487F, 0.689360321F, 0.565817058F, 0.528460801F,
    0.608590961F, 0.637937784F, 0.604911864F, 0.441592425F, 0.337279767F,
    0.452355802F, 0.527363896F, 0.487721473F, 0.496476084F, 0.606423259F,
    0.530697286F, 0.70179987F, 0.775146425F, 0.564645529F, 0.466438025F,
    0.625419438F, 0.640024781F, 0.552448511F, 0.488511264F, 0.45549041F,
    0.54749316F, 0.546719551F, 0.498181671F, 0.525942385F, 0.512131393F,
    0.455278724F, 0.485543638F, 0.525394917F, 0.505369723F, 0.492019355F,
    0.595876336F, 0.589951F, 0.522969842F, 0.483099878F, 0.414021164F,
    0.42867589F, 0.386378407F, 0.367713332F, 0.421858668F, 0.526706755F,
    0.369204849F, 0.262596875F, 0.264358252F, 0.329695523F, 0.407567978F,
    0.450216085F, 0.392485976F, 0.39628765F, 0.452813178F, 0.429338098F,
    0.380719692F, 0.330165118F, 0.347129077F, 0.392279446F };

    __attribute__ ((section(".heapsram"))) static const float fv1[16] = { 22.0982056F, 22.0977631F, 0.00450053439F,
    0.00450044405F, 26.793499F, 26.7929649F, 0.00580479903F, 0.00580468262F,
    25.2313404F, 25.2308369F, 0.00502731558F, 0.005027215F, 25.5063152F,
    25.5058041F, 0.00635486934F, 0.00635474268F };


/////////////////////////////////////////////////////////
// subfunctions
/////////////////////////////////////////////////////////


void mlGrad(const float img[225], float filt[169])
{
  int x;
  int y;
  float a;
  float b_a;

  /*  mlGrad */
  for (x = 0; x < 13; x++) {
    for (y = 0; y < 13; y++) {
      /*  sobel mask in x-direction */
      a = (((img[y + 15 * (2 + x)] - img[y + 15 * x]) + 2.0F * (img[(y + 15 * (2
               + x)) + 1] - img[(y + 15 * x) + 1])) + img[(y + 15 * (2 + x)) + 2])
        - img[(y + 15 * x) + 2];

      /*  sobel mask in y-direction */
      b_a = (((img[(y + 15 * x) + 2] - img[y + 15 * x]) + 2.0F * (img[(y + 15 *
                (1 + x)) + 2] - img[y + 15 * (1 + x)])) + img[(y + 15 * (2 + x))
             + 2]) - img[y + 15 * (2 + x)];
#ifdef FMA
      filt[y + 13 * x] = (real32_T)fSqrt(fFMA(a, a, b_a*b_a)) * 0.25F;
#else
      filt[y + 13 * x] = (real32_T)fSqrt(a * a + b_a * b_a) * 0.25F;
#endif
    }
  }
}


boolean_T checkRes(const float check[2], const float golden[4])
{
  int k;
  boolean_T pass = true;
  boolean_T flag = true;

  for (k = 0; k < 2; k++) {
    flag = true;
    flag = flag && (check[k] <= golden[k << 1]);
    flag = flag && (check[k] >= golden[1 + (k << 1)]);
    printErrors(!flag, k, check[k] ,golden[k << 1] , golden[1 + (k << 1)]);
    pass = flag && pass;
  }

  return pass;
}

float var(const float x[169])
{
  float y;
  int ix;
  float xbar;
  int k;
  float r;
  ix = 0;
  xbar = x[0];
  for (k = 0; k < 168; k++) {
    ix++;
    xbar += x[ix];
  }

  xbar = fDiv(xbar,169.0F);
  ix = 0;
  r = x[0] - xbar;
  y = r * r;
  for (k = 0; k < 168; k++) {
    ix++;
    r = x[ix] - xbar;
    y += r * r;
  }

  y = fDiv(y,168.0F);
  return y;
}

float sum(const float x[169])
{
  float y;
  int k;
  y = x[0];
  for (k = 0; k < 168; k++) {
    y += x[k + 1];
  }

  return y;
}

/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  int coreid, k,j;
  boolean_T pass;
  float y[169];
  float tmp[2];
  
  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  
  coreid = get_core_id();

  printf("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid);
  
  if (coreid>3)
    coreid=coreid-4;

  synch_barrier();

  perf_begin();

  for(k = 0; k < getKernelIts(); k++)
  {
    // matlab kernel
    mlGrad(*(float (*)[225])&fv0[225 * coreid], y);
  }

  synch_barrier();

  perf_end();

  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////

  synch_barrier();

  tmp[0] = sum(y);
  tmp[1] = var(y);
  pass   = checkRes(tmp, *(float (*)[4])&fv1[coreid << 2]);
  
  flagPassFail(pass, get_core_id());
  
  synch_barrier();

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

  return !pass;
}

